
clear all
set more off

global dir /Volumes/Zihao_SSD2/PatentsView

*** ================================================================================================
*** Table B7 (Different values of fixed k)
*** Run below from k=1 to 10, as robustness check (k=5 is the baseline)
*** Zihao Li. 06/2024
*** ================================================================================================
forval k=1/10 {
    import delimited "$dir/temp/omission_panel`k'_robust.csv", clear
	keep patent_id cited_patent_id omission sim_score
	rename (patent_id cited_patent_id) (patent_id_i patent_id_j)

	*** Merge with citing patents (i) and cited patents (j)
	merge m:1 patent_id_i using $dir/temp/patent_i.dta
	keep if _merge == 3
	drop _merge
	merge m:1 patent_id_j using $dir/temp/patent_j.dta
	keep if _merge == 3
	drop _merge
	sort patent_id_i patent_id_j

	*** Append examiner information
	tostring patent_id_i, replace
	merge m:1 patent_id_i using $dir/temp/g_examiner_gender_temp.dta
	drop if _merge!=3
	drop _merge
	destring patent_id_i, replace

	*** Generate regression variables
	* same main_cpc_subclass
	gen same_main_cpc = 1 if main_cpc_subclass_i == main_cpc_subclass_j
	replace same_main_cpc = 0 if same_main_cpc != 1

	* same assignee_country
	gen same_assignee_country = 1 if assignee_country_i == assignee_country_j & assignee_country_i != "" & assignee_country_j != ""
	replace same_assignee_country = 0 if same_assignee_country != 1

	* same assignee_location
	gen same_assignee_location = 1 if assignee_location_id_i == assignee_location_id_j & assignee_location_id_i != "" & assignee_location_id_j != ""
	replace same_assignee_location = 0 if same_assignee_location != 1

	* same assignee
	gen same_assignee = 1 if assignee_id_i == assignee_id_j
	replace same_assignee = 0 if same_assignee != 1

	* years lag between citing patent and cited patent
	gen years_lag = patent_year_i - patent_year_j

	* KPSS commercial value (quality) variable (convert to dollar)
	gen xi_dollar_real_i = xi_real_i * 1000000
	gen xi_dollar_nominal_i = xi_nominal_i * 1000000
	gen xi_dollar_real_j = xi_real_j* 1000000
	gen xi_dollar_nominal_j = xi_nominal_j * 1000000
	gen dollar_real_log_i = log(xi_dollar_real_i)
	gen dollar_real_log_j = log(xi_dollar_real_j)
	gen dollar_nominal_log_i = log(xi_dollar_nominal_i)
	gen dollar_nominal_log_j = log(xi_dollar_nominal_j)

	drop gender_09_100_list* gender_io_09_100_list* gender_09_50_list* gender_08_100_list* gender_08_50_list* gender_io_05_100_list* gender_io_06_100_list* gender_io_07_100_list* gender_io_08_100_list* race80_list*
	drop if sim_score == 1

	* Generate gender interaction variables
	gen allfemale_09_100_ji = allfemale_09_100_i * allfemale_09_100_j
	gen leadfemale_09_100_ji = leadfemale_09_100_i * leadfemale_09_100_j
	gen allfemale_leadfemale_09_100_ji = allfemale_09_100_j * leadfemale_09_100_i
	gen leadfemale_allfemale_09_100_ji = leadfemale_09_100_j * allfemale_09_100_i
	gen existfemale_09_100_ji = existfemale_09_100_i * existfemale_09_100_j
	gen allfemale_existfemale_09_100_ji = allfemale_09_100_j * existfemale_09_100_i
	gen existfemale_allfemale_09_100_ji = existfemale_09_100_j * allfemale_09_100_i

	*** Export final dataset for regression
    export delimited using "$dir/regdata/reg_panel`k'_robust.csv", replace
}
*** ================================================================================================



*** ================================================================================================
*** Table B8 (k = |actual citation list|)
import delimited $dir/temp/omission_panel_flex.csv, clear
keep patent_id cited_patent_id omission sim_score
rename (patent_id cited_patent_id) (patent_id_i patent_id_j)

*** Merge with citing patents (i) and cited patents (j)
merge m:1 patent_id_i using $dir/temp/patent_i.dta
keep if _merge == 3
drop _merge
merge m:1 patent_id_j using $dir/temp/patent_j.dta
keep if _merge == 3
drop _merge
sort patent_id_i patent_id_j

*** Append examiner information
tostring patent_id_i, replace
merge m:1 patent_id_i using $dir/temp/g_examiner_gender_temp.dta
drop if _merge!=3
drop _merge
destring patent_id_i, replace

*** Generate regression variables
* same main_cpc_subclass
gen same_main_cpc = 1 if main_cpc_subclass_i == main_cpc_subclass_j
replace same_main_cpc = 0 if same_main_cpc != 1

* same assignee_country
gen same_assignee_country = 1 if assignee_country_i == assignee_country_j & assignee_country_i != "" & assignee_country_j != ""
replace same_assignee_country = 0 if same_assignee_country != 1

* same assignee_location
gen same_assignee_location = 1 if assignee_location_id_i == assignee_location_id_j & assignee_location_id_i != "" & assignee_location_id_j != ""
replace same_assignee_location = 0 if same_assignee_location != 1

* same assignee
gen same_assignee = 1 if assignee_id_i == assignee_id_j
replace same_assignee = 0 if same_assignee != 1

* years lag between citing patent and cited patent
gen years_lag = patent_year_i - patent_year_j

* KPSS commercial value (quality) variable (convert to dollar)
gen xi_dollar_real_i = xi_real_i * 1000000
gen xi_dollar_nominal_i = xi_nominal_i * 1000000
gen xi_dollar_real_j = xi_real_j* 1000000
gen xi_dollar_nominal_j = xi_nominal_j * 1000000
gen dollar_real_log_i = log(xi_dollar_real_i)
gen dollar_real_log_j = log(xi_dollar_real_j)
gen dollar_nominal_log_i = log(xi_dollar_nominal_i)
gen dollar_nominal_log_j = log(xi_dollar_nominal_j)

drop gender_09_100_list* gender_io_09_100_list* gender_09_50_list* gender_08_100_list* gender_08_50_list* gender_io_05_100_list* gender_io_06_100_list* gender_io_07_100_list* gender_io_08_100_list* race80_list*
drop if sim_score == 1

* Generate gender interaction variables
gen allfemale_09_100_ji = allfemale_09_100_i * allfemale_09_100_j
gen leadfemale_09_100_ji = leadfemale_09_100_i * leadfemale_09_100_j
gen allfemale_leadfemale_09_100_ji = allfemale_09_100_j * leadfemale_09_100_i
gen leadfemale_allfemale_09_100_ji = leadfemale_09_100_j * allfemale_09_100_i
gen existfemale_09_100_ji = existfemale_09_100_i * existfemale_09_100_j
gen allfemale_existfemale_09_100_ji = allfemale_09_100_j * existfemale_09_100_i
gen existfemale_allfemale_09_100_ji = existfemale_09_100_j * allfemale_09_100_i

*** Export final dataset for regression
export delimited using $dir/regdata/reg_panel_flex.csv, replace
*** ================================================================================================



*** ================================================================================================
*** Table B9 (Construct omission panel based on KPSS firms)
import delimited $dir/temp/omission_panel_restrict.csv, clear
keep patent_id cited_patent_id omission sim_score
rename (patent_id cited_patent_id) (patent_id_i patent_id_j)

*** Merge with citing patents (i) and cited patents (j)
merge m:1 patent_id_i using $dir/temp/patent_i.dta
keep if _merge == 3
drop _merge
merge m:1 patent_id_j using $dir/temp/patent_j.dta
keep if _merge == 3
drop _merge
sort patent_id_i patent_id_j

*** Append examiner information
tostring patent_id_i, replace
merge m:1 patent_id_i using $dir/temp/g_examiner_gender_temp.dta
drop if _merge!=3
drop _merge
destring patent_id_i, replace

*** Generate regression variables
* same main_cpc_subclass
gen same_main_cpc = 1 if main_cpc_subclass_i == main_cpc_subclass_j
replace same_main_cpc = 0 if same_main_cpc != 1

* same assignee_country
gen same_assignee_country = 1 if assignee_country_i == assignee_country_j & assignee_country_i != "" & assignee_country_j != ""
replace same_assignee_country = 0 if same_assignee_country != 1

* same assignee_location
gen same_assignee_location = 1 if assignee_location_id_i == assignee_location_id_j & assignee_location_id_i != "" & assignee_location_id_j != ""
replace same_assignee_location = 0 if same_assignee_location != 1

* same assignee
gen same_assignee = 1 if assignee_id_i == assignee_id_j
replace same_assignee = 0 if same_assignee != 1

* years lag between citing patent and cited patent
gen years_lag = patent_year_i - patent_year_j

* KPSS commercial value (quality) variable (convert to dollar)
gen xi_dollar_real_i = xi_real_i * 1000000
gen xi_dollar_nominal_i = xi_nominal_i * 1000000
gen xi_dollar_real_j = xi_real_j* 1000000
gen xi_dollar_nominal_j = xi_nominal_j * 1000000
gen dollar_real_log_i = log(xi_dollar_real_i)
gen dollar_real_log_j = log(xi_dollar_real_j)
gen dollar_nominal_log_i = log(xi_dollar_nominal_i)
gen dollar_nominal_log_j = log(xi_dollar_nominal_j)

drop gender_09_100_list* gender_io_09_100_list* gender_09_50_list* gender_08_100_list* gender_08_50_list* gender_io_05_100_list* gender_io_06_100_list* gender_io_07_100_list* gender_io_08_100_list* race80_list*
drop if sim_score == 1

* Generate gender interaction variables
gen allfemale_09_100_ji = allfemale_09_100_i * allfemale_09_100_j
gen leadfemale_09_100_ji = leadfemale_09_100_i * leadfemale_09_100_j
gen allfemale_leadfemale_09_100_ji = allfemale_09_100_j * leadfemale_09_100_i
gen leadfemale_allfemale_09_100_ji = leadfemale_09_100_j * allfemale_09_100_i
gen existfemale_09_100_ji = existfemale_09_100_i * existfemale_09_100_j
gen allfemale_existfemale_09_100_ji = allfemale_09_100_j * existfemale_09_100_i
gen existfemale_allfemale_09_100_ji = existfemale_09_100_j * allfemale_09_100_i

*** Export final dataset for regression
export delimited using $dir/regdata/reg_panel_restrict.csv, replace
*** ================================================================================================================

